Setup

Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/

## Library Preparation
library(clusterProfiler)
library(ReactomePA)
library(DESeq2)
library(dplyr)
library(tidyverse)
library(presto)
library(ashr)
library(GOSemSim)
library(org.Ce.eg.db)
library(bookdown)
library(knitr)
library(DT)
library(htmltools)
library(kableExtra)
# Call functions
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/translateBioIDs.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/callClusterProfilerFunc.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/compileOntologyReport.R")
source("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/scripts/emptyTableMessage.R")

Function called for Analysis

compileOntologyReport.R

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #  
# #         Calls translateBioIDs.R and geneOntologyAnalysis.R        # # 
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 
compileOntologyReport <- function(result.obj, strain, fraction, l2fc_filter, padj_filter){ 
   
  # Create comment structure for bookdown render 
  cat(sprintf("# %s %s {.title .tabset}\n\n", strain, fraction)) 
  #cat(paste0("# ", strain," ", fraction, " {.title .tabset}\n\n")) 
 
  # Show exact call which invoked function 
  call_txt <- paste(deparse(match.call()), collapse = "\n") 
  cat("**Inputs for function call:**\n\n") 
  cat("```r\n") 
  cat(call_txt, "\n\n") 
  cat("```\n\n") 
   
  # Translate Wormbase IDs to ENTREZID 
  result.obj.list <- translateBioIDs( 
    DESeqResults.obj = result.obj, 
    bioID = "WORMBASE", 
    l2fc_filter = l2fc_filter, 
    padj_filter = padj_filter 
  ) 
   
  # Run gruopGO(), enrichGO(), gseGO(), and goplot() 
  go.result.list <- callClusterProfilerFunc( 
    result.obj.list = result.obj.list, 
    OrgDb = org.Ce.eg.db) 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #     Print text and tables to console for bookdown rendering     # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   
  knitDataTable <- function(df, tableName, fileExtension){ 
    if(nrow(data.frame(df[[1]])) != 0){ 
      table <- list(datatable(data.frame(df[[1]]), 
                            extensions = 'Buttons', 
                            rownames = FALSE, 
                            caption = tableName, 
                            options = list( 
                              dom = "Bfrtip", 
                              buttons = list('copy', 
                                 list(extend = 'csv', filename = sprintf("%s_%s_%s", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension)), 
                                 list(extend = 'excel', filename = sprintf("%s_%s_%s", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension)) 
                              ), 
                              pageLength = 5, 
                              scrollX = TRUE))) 
      print(htmltools::tagList(table[[1]])) 
      cat("\n\n") 
      write.csv(df[[1]], file = sprintf("./results/%s_%s_%s.csv", gsub(" ", "_", tolower(strain)), tolower(fraction), fileExtension)) 
    }else{emptyTableMessage(tableName)} 
     
    # clean up environment 
    rm(table) 
  } 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # # #          Dysregulated Reactome Pathway GSE, GO GSE          # # #  
  cat(paste0("\n\n## Dysregulated\n\n")) 
   
  # Reactome Pathway GSE 
  cat("**Reactome Pathway GSEA**") 
  knitDataTable( 
    df = go.result.list$react_gse, 
    tableName = " Reactome Gene Set Enrichment Analysis", 
    fileExtension = "Reactome_gsea" 
  ) 
   
  # # # GSEA plot condiitonal rendering of RP GSEA plots 
  # Tabset for Reactome GSEA plots 
  cat("#### {.tabset .tabset-dropdown}\n\n") 
 
  # Print GSEA plot for each react pathway set identified  
  for (i in 1:nrow(data.frame(go.result.list$react_gse[[1]]))) { 
    if (i > 30) {break} # Dont print over 30 plots 
    cat(sprintf("##### %s \n\n", go.result.list$react_gse[[1]]$Description[i])) 
    print(gseaplot( 
      go.result.list$react_gse[[1]],  
      by = "all",  
      title = go.result.list$react_gse[[1]]$Description[i],  
      geneSetID = go.result.list$react_gse[[1]]$ID[i])) 
    cat("\n\n") 
  } 
   
  # GO GSE 
  cat("### \n\n") # Break out of previous tabset 
  cat("**GO GSEA**") 
  knitDataTable( 
    df = go.result.list$go_gse, 
    tableName = " GSE Of All Differentially Expressed Genes", 
    fileExtension = "GO_gsea" 
  ) 
   
  # # # GSEA plot condiitonal rendering 
  # Tabset for GSEA plots 
  cat("#### {.tabset .tabset-dropdown}\n\n") 
 
  # Print GSEA plot for each set identified  
  for (i in 1:nrow(data.frame(go.result.list$go_gse[[1]]))) { 
    if (i > 30) {break} # Dont print over 30 plots 
    cat(sprintf("##### %s \n\n", go.result.list$go_gse[[1]]$Description[i])) 
    print(gseaplot( 
      go.result.list$go_gse[[1]],  
      by = "all",  
      title = go.result.list$go_gse[[1]]$Description[i],  
      geneSetID = go.result.list$go_gse[[1]]$ID[i])) 
    cat("\n\n") 
  } 
 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # # #    Upregulated GO classification, GO over-representation, RP over-representation    # # # 
  cat(paste0("\n\n## Upregulated\n\n")) 
   
  # GO classification 
  knitDataTable( 
    df = go.result.list$go_class_upreg, 
    tableName = " GO Classification of Upregulated Genes", 
    fileExtension = "GO_classification_upreg" 
  ) 
   
  # GO over-representation 
  knitDataTable( 
    df = go.result.list$go_overrep_upreg, 
    tableName = " GO Over-representation Analysis of Upregulated Genes", 
    fileExtension = "GO_overrepresented_upreg" 
  ) 
   
  # RP over-representation 
  knitDataTable( 
    df = go.result.list$react_overrep_upreg, 
    tableName = " Reactome Pathway Over-representation Analysis of Upregulated Genes", 
    fileExtension = "Reactome_overrepresented_upreg" 
  ) 
   
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # # #    Downregulated GO classification, GO over-representation, RP over-representation    # # # 
  cat(paste0("\n\n## Downregulated\n\n")) 
   
  # GO classification 
  knitDataTable( 
    df = go.result.list$go_class_downreg, 
    tableName = " GO Classification of Downregulated Genes", 
    fileExtension = "GO_classification_downreg" 
  ) 
 
  # GO over-representation 
  knitDataTable( 
    df = go.result.list$go_overrep_downreg, 
    tableName = " GO Over-representation Analysis of Downregulated Genes", 
    fileExtension = "GO_overrepresented_downreg" 
  ) 
   
  # RP over-representation 
  knitDataTable( 
    df = go.result.list$react_overrep_downreg, 
    tableName = " Reactome Pathway Over-representation Analysis of Downregulated Genes", 
    fileExtension = "Reactome_overrepresented_downreg" 
  ) 
 
} 

translateBioIDs.R

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# #                       Translating Biological IDS                      # # 
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# # # From WormBase to ENTREZID 
 
translateBioIDs <- function (DESeqResults.obj, bioID, return.success = TRUE, l2fc_filter, padj_filter) { 
   
  if (bioID != "WORMBASE"){ 
    stop("⛔️ ERROR: Only WORMBASE biological ID supported as input.") 
  } 
   
  # Convert DESeq2 results object to DataFrame, and edit structure 
  gene.df <- DESeqResults.obj %>% 
    data.frame() %>% 
    rownames_to_column("WORMBASE") %>% 
    dplyr::select(WORMBASE, log2FoldChange, pvalue, padj) 
   
  # Biological ID TranslatoR 
  gene_ids <- bitr(gene.df[,'WORMBASE'], 
                   fromType = "WORMBASE", 
                   toType = "ENTREZID", 
                   OrgDb = org.Ce.eg.db) 
   
  # Record success of ID transfer 
  t1 <- table(gene.df$WORMBASE %in% gene_ids$WORMBASE) 
   
  # Clear duplicate genes following ID transfer 
  gene_ids <- gene_ids %>% 
    distinct(WORMBASE, .keep_all = T) %>% 
    column_to_rownames("WORMBASE") 
   
  # Add ENTREZID to L2FC carrying DataFrame 
  gene.df <- gene.df %>% 
    mutate(ENTREZID = gene_ids[WORMBASE, 1]) %>% 
    dplyr::select(ENTREZID, log2FoldChange, pvalue, padj) %>% 
    subset(!is.na(ENTREZID)) %>% # remove NA ENTREZ ID values 
    arrange(desc(log2FoldChange)) # <--- arrange genes by L2FC descending 
  rownames(gene.df) <- NULL 
   
  # #    Subset L2FC Data    # #  
   
  # Subset for highly varirable genes 
  variable.genes <- gene.df %>% 
    subset(abs(log2FoldChange) > l2fc_filter & padj < padj_filter & !isNA(padj)) 
  rownames(variable.genes) <- NULL 
   
  # Subset for highly upregulated genes 
  upregulated.genes <- gene.df %>% 
    subset(log2FoldChange > l2fc_filter & padj < padj_filter & !isNA(padj)) 
  rownames(upregulated.genes) <- NULL 
   
  # Subset for highly downregulated genes 
  downregulated.genes <- gene.df %>% 
    subset(log2FoldChange < -l2fc_filter & padj < padj_filter & !isNA(padj)) 
  rownames(downregulated.genes) <- NULL 
   
  # Subset for all genes (universe DataFrame) 
  all.genes <- gene.df %>% 
    subset(!isNA(padj)) %>% 
    arrange(desc(log2FoldChange)) 
   
  # Vectorized differentially expressed genes 
  vectorized <- all.genes[,2] 
  names(vectorized) <- all.genes[,1] 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                     Print Results Following Subset                    # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 
  # Record transfer success 
  if (return.success == TRUE){ 
    cat("\n") 
    cat("**Transfering WORMBASE gene IDs to ENTREZID:**\n\n") 
    cat("```r\n") 
    cat(paste0(t1[["FALSE"]]," gene IDs were not transfered from WORMBASE to ENTREZID \n")) 
    cat(paste0(t1[["TRUE"]]," gene IDs were successfully transfered from WORMBASE to ENTREZID \n\n")) 
    cat("```\n\n") 
  } 
   
  # report number of highly variable genes used for analysis 
  cat(sprintf("**Number of highly variable genes identified (with | L2FC | > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter))) 
  cat("```r\n") 
  cat(length(variable.genes[[1]])) 
  cat("\n\n```\n\n") 
   
  # report number of highly variable genes used for analysis 
  cat(sprintf("**Number of highly upregulated genes identified (with L2FC > %s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter))) 
  cat("```r\n") 
  cat(length(upregulated.genes[[1]])) 
  cat("\n\n```\n\n") 
   
  # report number of highly variable genes used for analysis 
  cat(sprintf("**Number of highly downregulated genes identified (with L2FC < -%s and padj < %s) is:**\n\n", toString(l2fc_filter) , toString(padj_filter))) 
  cat("```r\n") 
  cat(length(downregulated.genes[[1]])) 
  cat("\n\n```\n\n") 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                               Return Data                             # #  
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # Save all data to list 
  gene.list <- list( 
    variable.genes,  
    upregulated.genes,  
    downregulated.genes,  
    all.genes,  
    vectorized 
    ) 
   
  names(gene.list) <- c( 
    "variable_genes",  
    "upregulated_genes",  
    "downregulated_genes",  
    "all_genes",  
    "vectorized_all_DE" 
    ) 
   
  return(gene.list) 
} 

callClusterProfilerFunc.R

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# #         Gene Ontology Enrichment Analysis         # # 
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 
callClusterProfilerFunc <- function(result.obj.list, OrgDb){ 
   
  # # Extract filtered lists from list container 
  geneNames.variable <- result.obj.list$variable_genes[,1] 
  geneNames.up <- result.obj.list$upregulated_genes[,1] 
  geneNames.down <- result.obj.list$downregulated_genes[,1] 
  geneList <- result.obj.list$vectorized_all_DE 
  universe <- result.obj.list$all_genes[,1] 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                       Analysis of Upregulated Genes                   # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  ## RUN WITH BP ONTOLOGY 
  # # GO Classification # # 
  go_class_upreg <- groupGO( 
      gene = geneNames.up, 
      OrgDb = OrgDb, 
      ont = "BP", 
      level = 3, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
  # # GO Over-Representation Analysis # # 
  go_overrep_upreg <- enrichGO( 
      gene = geneNames.up, 
      universe = universe, 
      OrgDb = OrgDb, 
      ont = "BP", 
      pAdjustMethod = "BH", 
      pvalueCutoff = 0.1, 
      qvalueCutoff = 0.1, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
  # # Reactome Pathway Over-Representation Analysis # #  
  reactome_overrep_upreg <- enrichPathway( 
      gene = geneNames.up,  
      universe = universe, 
      organism = 'celegans', 
      pvalueCutoff = 0.05,  
      qvalueCutoff = 0.1, 
      minGSSize = 20, 
      maxGSSize = 500, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                      Analysis of Downregulated Genes                  # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   
  # # GO Classification # # 
  go_class_downreg <- groupGO( 
      gene = geneNames.down, 
      OrgDb = OrgDb, 
      ont = "BP", 
      level = 3, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
  # # GO Over-Representation Analysis # # 
  go_overrep_downreg <- enrichGO( 
      gene = geneNames.down, 
      universe = universe, 
      OrgDb = OrgDb, 
      ont = "BP", 
      pAdjustMethod = "BH", 
      pvalueCutoff = 0.1, 
      qvalueCutoff = 0.1, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
  # # Reactome Pathway Over-Representation Analysis # #  
  reactome_overrep_downreg <- enrichPathway( 
      gene = geneNames.down,  
      universe = universe, 
      organism = 'celegans', 
      pvalueCutoff = 0.05,  
      qvalueCutoff = 0.1, 
      minGSSize = 20, 
      maxGSSize = 500, 
      readable = TRUE) %>% 
    arrange(desc(Count)) 
   
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                    GO Gene Set Enrichment Analysis                    # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  go_gene_set_enrichment <- gseGO( 
    geneList = geneList, 
        OrgDb = OrgDb, 
        ont = "BP", 
        minGSSize = 20, 
        maxGSSize = 500, 
        pvalueCutoff = 0.1, 
        verbose = FALSE 
        ) 
   
  # change gene IDs from ENTREZID to symbol 
  go_gene_set_enrichment <- setReadable(go_gene_set_enrichment, OrgDb = OrgDb, keyType="ENTREZID") 
   
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                       Reactome Enrichment Analysis                    # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 
  reactome_gene_set_enrichment <- gsePathway( 
    geneList = geneList,  
    organism = 'celegans', 
    pvalueCutoff = 0.2, 
    pAdjustMethod = "BH",  
    minGSSize = 20, 
    maxGSSize = 500, 
    verbose = FALSE 
    ) 
   
  # change Reactome GSE gene IDs from ENTREZID to symbol 
  reactome_gene_set_enrichment <- setReadable(reactome_gene_set_enrichment, OrgDb = OrgDb, keyType="ENTREZID") 
   
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  # #                             Return Results                            # # 
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   
  go_results <- list( 
    list(go_class_upreg), 
    list(go_overrep_upreg),  
    list(reactome_overrep_upreg), 
    list(go_class_downreg), 
    list(go_overrep_downreg),  
    list(reactome_overrep_downreg), 
    list(go_gene_set_enrichment), 
    list(reactome_gene_set_enrichment) 
    ) 
   
  names(go_results) <- c( 
    "go_class_upreg",  
    "go_overrep_upreg",  
    "react_overrep_upreg", 
    "go_class_downreg",  
    "go_overrep_downreg",  
    "react_overrep_downreg", 
    "go_gse", 
    "react_gse" 
    ) 
   
  return(go_results) 
} 

Read in Data

# Lysate
lys <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Lysate_BatchCor_dds.rds")

# Polysome
ply <- readRDS("~/Documents/research_labs/mdibl_fall_2025/updike_lotr1_go_analysis/intermediate_data/deseq2_dds_objects/lotr1_isolatedStrain_Polysome_BatchCor_dds.rds")

Get DESeq2 Results

# Full KO vs WT
full_vs_wt.lys <- results(lys, name = "strain_DUP207_vs_DUP167")
full_vs_wt.ply <- results(ply, name = "strain_DUP207_vs_DUP167")

# Tudor KO vs WT 
tudor_vs_wt.lys <- results(lys, name = "strain_DUP185_vs_DUP167")
tudor_vs_wt.ply <- results(ply, name = "strain_DUP185_vs_DUP167")

# LOTUS KO vs WT
lotus_vs_wt.lys <- results(lys, name = "strain_DUP217_vs_DUP167")
lotus_vs_wt.ply <- results(ply, name = "strain_DUP217_vs_DUP167")

Full LOTR-1 KO vs WT Lysate

Inputs for function call:

compileOntologyReport(result.obj = full_vs_wt.lys, strain = "Full LOTR-1 KO vs WT", 
    fraction = "Lysate", l2fc_filter = 0.8, padj_filter = 0.1) 

Transfering WORMBASE gene IDs to ENTREZID:

542 gene IDs were not transfered from WORMBASE to ENTREZID 
15111 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:

300

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:

254

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:

46

Dysregulated

Reactome Pathway GSEA

GTP hydrolysis and joining of the 60S ribosomal subunit

SRP-dependent cotranslational protein targeting to membrane

Translation

Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)

Eukaryotic Translation Initiation

Cap-dependent Translation Initiation

L13a-mediated translational silencing of Ceruloplasmin expression

Formation of a pool of free 40S subunits

Nonsense-Mediated Decay (NMD)

Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)

Metabolism of RNA

Cellular responses to stress

Cellular responses to stimuli

Translation initiation complex formation

Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S

Ribosomal scanning and start codon recognition

Cellular response to hypoxia

Chromatin modifying enzymes

Chromatin organization

DNA Replication

DNA Replication Pre-Initiation

Hedgehog ‘on’ state

Gene Silencing by RNA

Positive epigenetic regulation of rRNA expression

B-WICH complex positively regulates rRNA expression

Transcriptional regulation by small RNAs

MAPK6/MAPK4 signaling

Assembly of the ORC complex at the origin of replication

Regulation of endogenous retroelements

Regulation of endogenous retroelements by KRAB-ZFP proteins

GO GSEA

translation

protein phosphorylation

phosphorylation

Upregulated

Downregulated

Table GO Over-representation Analysis of Downregulated Genes has no rows.